library(tidyverse)
library(patchwork)
library(geofi)
library(pxweb)
library(scales)
library(lubridate)
library(data.table)
library(gridExtra)px_12at <- pxweb_get(url =
"https://statfin.stat.fi:443/PxWeb/api/v1/en/StatFin/kuol/statfin_kuol_pxt_12at.px",
query = list("Tiedot"=c("vm01", "vm11", "vaesto"), "Vuosi"=c("*")))
df_12at <- as_tibble(as.data.frame(px_12at, column.name.type = "text", variable.value.type = "text"))
df_12at <- df_12at |>
rename(Live_births = "Live births") |>
mutate(Year = as.integer(Year),
Death_rate = Deaths / Population * 100000)
# Source: http://www.saunalahti.fi/arnoldus/kuolovuo.html
labels = tribble(
~Year, ~Deaths, ~Label,
1868, 137720, "Finnish famine (1866–1868)",
1918, 92102, "Civil War\n(1918)",
1948, 63846, "Winter,\nContinuation &\nLapland Wars\n(1939–45)",
1833, 55038, "Smallpox,\ndysentery\n& influenza\n(1833)",
1806, 51942, "Finnish War\n(1808-09)",
2008, 48659, "In 2021 there were\n57,659 deaths in\nFinland, highest\nsince 1940s"
)
ggplot(df_12at, aes(Year, Deaths)) +
geom_line(size = 1.1, color = "#505085") +
scale_x_continuous(breaks = seq(1750, 2020, 25)) +
scale_y_continuous(labels = scales::comma, breaks = seq(0, 150000, 10000)) +
geom_text(aes(label=Label), size = 4.5, vjust = -0.5, data=labels) +
labs(subtitle = "Deaths",
y = NULL,
x = NULL) +
theme_minimal() -> yearly_deaths_plot
yearly_plot <- function(df, y_stat, subtitle) {
ggplot(df, aes(Year, {{y_stat}})) +
geom_line(color = "#505085") +
scale_x_continuous(breaks = seq(1750, 2020, 50)) +
scale_y_continuous(labels = scales::comma) +
labs(subtitle = subtitle,
y = NULL,
x = NULL) +
theme_minimal() +
theme(text = element_text(size = 11))
}
yearly_population_plot <- yearly_plot(df_12at, Population, "Population")
yearly_births_plot <- yearly_plot(df_12at, Live_births, "Live births")
yearly_death_rate_plot <- yearly_plot(df_12at, Death_rate, "Deaths / 100,000 people")
layout <- "
AAA
AAA
BCD
"
(yearly_deaths_plot + yearly_population_plot +
yearly_births_plot + yearly_death_rate_plot +
plot_layout(design = layout)) +
plot_annotation(title = "Yearly deaths, population and births in Finland, 1749 - 2021",
caption = "source: Tilastokeskus 12at -- Vital statistics and population, 1749-2021"
) -> plot_yearly
ggsave("images//1_plot_yearly.png", plot_yearly, device = "png", dpi = 96,
width = 9, height = 9, units = c("in"))px_12ah <- pxweb_get(url =
"https://statfin.stat.fi:443/PxWeb/api/v1/en/StatFin/kuol/statfin_kuol_pxt_12ah.px",
query = list("Tapahtumakuukausi" = c("M01", "M02", "M03", "M04", "M05", "M06",
"M07", "M08", "M09", "M10", "M11", "M12"),
"Tiedot"=c("*"), "Vuosi"=c("*")))
df_12ah <- as_tibble(as.data.frame(px_12ah, column.name.type = "text", variable.value.type = "text"))
avg_days_per_month = c(31, 28.25, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
df_12ah |>
rename(Month = "Month of occurrence") |>
mutate(Year = as.integer(Year),
Month = as_factor(Month),
Deaths_daily = as.integer(Deaths / avg_days_per_month),
Month_short = as_factor(rep(month.abb, n()/12)),
Decade = as.character(paste0(Year - Year %% 10, "s")),
Decade = str_replace_all(Decade, "1940s", "1945-1949")
) -> df_12ah
df_12ah |>
group_by(Month) |>
summarise(Deaths_daily = round(mean(Deaths_daily))) |>
ggplot(aes(Month, Deaths_daily)) +
geom_col() +
geom_text(aes(label = Deaths_daily), vjust = 1.5,
color = "white", size = 4) +
theme_bw() +
labs(y = NULL,
x = NULL) -> daily_deaths_month_plot
df_12ah |>
filter(Decade != "2020s") |>
group_by(Decade, Month_short) |>
summarise(Deaths_daily = round(mean(Deaths_daily))) |>
ggplot(aes(Month_short, Deaths_daily)) +
geom_col() +
theme_bw() +
facet_wrap(vars(Decade), nrow = 2) +
geom_text(aes(label = Deaths_daily),
color = "white", size = 3.2, angle = 270, hjust = -0.1) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.15)) +
labs(subtitle = "By decade",
y = NULL,
x = NULL,
caption = "source: Tilastokeskus 12ah -- Deaths by month, 1945-2021"
) -> daily_deaths_decade_plot
plot_months <- daily_deaths_month_plot / daily_deaths_decade_plot +
plot_annotation(title = "Daily deaths in Finland by month (1945-2021)",
subtitle = "Death rates are higher in winter months")
ggsave("images//2_plot_months.png", plot_months, device = "png", dpi = 96,
width = 9, height = 9, units = c("in"))# Get the correspondence table between municipalities and regions in 2020
# https://www.stat.fi/en/luokitukset/corrmaps/kunta_1_20200101%23maakunta_1_20200101/
region_keys_2020 <- fread("kunta_1_20200101%23maakunta_1_20200101.csv", encoding = "Latin-1",
col.names = c("id_1", "Municipality", "id_2", "Region"), header = F)
# Get populations and ages per region per year
px_11rf <- pxweb_get(url =
"https://statfin.stat.fi:443/PxWeb/api/v1/en/StatFin/vaerak/statfin_vaerak_pxt_11rf.px",
query = list(
"Kunta" = c("*"),
"Sukupuoli" = c("1", "2"),
"Vuosi" = c("2016", "2017", "2018", "2019", "2020"),
"Ikä" = c("*"),
"Tiedot" = c("*")))
df_11rf <- as_tibble(as.data.frame(px_11rf, column.name.type = "text", variable.value.type = "text"))
df_11rf |>
drop_na() |>
filter(Municipality != "WHOLE COUNTRY",
Age != "Total") |>
left_join(region_keys_2020, by = "Municipality") |>
mutate(Age = ifelse(Age == "100 -", "100", Age),
Age = as.integer(Age)) |>
mutate(Age_group = case_when(Age == 0 ~ "0",
between(Age, 1, 4) ~ "1 - 4",
between(Age, 5, 9) ~ "5 - 9",
between(Age, 10, 14) ~ "10 - 14",
between(Age, 15, 19) ~ "15 - 19",
between(Age, 20, 24) ~ "20 - 24",
between(Age, 25, 29) ~ "25 - 29",
between(Age, 30, 34) ~ "30 - 34",
between(Age, 35, 39) ~ "35 - 39",
between(Age, 40, 44) ~ "40 - 44",
between(Age, 45, 49) ~ "45 - 49",
between(Age, 50, 54) ~ "50 - 54",
between(Age, 55, 59) ~ "55 - 59",
between(Age, 60, 64) ~ "60 - 64",
between(Age, 65, 69) ~ "65 - 69",
between(Age, 70, 74) ~ "70 - 74",
between(Age, 75, 79) ~ "75 - 79",
between(Age, 80, 84) ~ "80 - 84",
between(Age, 85, 89) ~ "85 - 89",
between(Age, 90, 94) ~ "90 - 94",
between(Age, 95, 150) ~ "95 -",)) |>
rename(Population = "Population 31 Dec") |>
select(Municipality, Year, Sex, Population, Region, Age_group) |>
rename(Age = Age_group) |>
mutate(Region = case_when(Municipality == "Juankoski" ~ "North Savo",
Municipality == "Luvia" ~ "Satakunta",
Municipality == "Valtimo" ~ "North Karelia",
TRUE ~ Region)) |>
group_by(Region, Year, Age, Sex) |>
summarise(Population = sum(Population)) -> df_11rf
df_11rf |>
group_by(Year, Age, Sex) |>
summarise(Population_All_Regions = sum(Population)) -> df_all_regions
df_11rf |>
left_join(df_all_regions, by = c("Year", "Age", "Sex")) -> df_11rf
# Get Deaths by Underlying cause of death (time series classification), Age, Information, Gender and Year
px_11bs <- pxweb_get(url =
"https://statfin.stat.fi:443/PxWeb/api/v1/en/StatFin/ksyyt/statfin_ksyyt_pxt_11bs.px",
query = list(
"Tilaston peruskuolemansyy (aikasarjaluokitus)" = c("*"),
"Sukupuoli" = c("1", "2"),
"Vuosi" = c("2016", "2017", "2018", "2019", "2020"),
"Ikä" = c("*"),
"Tiedot" = c("*")))
df_11bs <- as_tibble(as.data.frame(px_11bs, column.name.type = "text", variable.value.type = "text"))
df_11bs$`Underlying cause of death (time series classification)` |> unique() -> causes
df_11bs |>
rename(Cause = "Underlying cause of death (time series classification)",
Sex = Gender) |>
filter(Age != "Total") -> df_11bs
df_11rf |>
left_join(df_11bs, by = c("Age", "Sex", "Year")) |>
rename(Deaths_All_Regions = Deaths) |>
mutate(Expected = (Population / Population_All_Regions) * Deaths_All_Regions) |>
group_by(Region, Cause) |>
summarise(Expected = round(sum(Expected), 2)) -> df_11rf
# For causes of death per region
px_11bt <- pxweb_get(url =
"https://statfin.stat.fi:443/PxWeb/api/v1/en/StatFin/ksyyt/statfin_ksyyt_pxt_11bt.px",
query = list(
"Maakunta" = c("MK01", "MK02", "MK04", "MK05", "MK06", "MK07", "MK08", "MK09", "MK10",
"MK11", "MK12", "MK13", "MK14", "MK15", "MK16", "MK17", "MK18", "MK19", "MK21"),
"Tilaston peruskuolemansyy (aikasarjaluokitus)" = c("*"),
"Vuosi" = c("2016", "2017", "2018", "2019", "2020"),
"Tiedot" = c("*")))
df_11bt <- as_tibble(as.data.frame(px_11bt, column.name.type = "text", variable.value.type = "text"))
# For Finnish translations
px_11bt_fi <- pxweb_get(url =
"https://pxdata.stat.fi:443/PxWeb/api/v1/fi/StatFin/ksyyt/statfin_ksyyt_pxt_11bt.px",
query = list(
"Maakunta" = c("MK01", "MK02", "MK04", "MK05", "MK06", "MK07", "MK08", "MK09", "MK10",
"MK11", "MK12", "MK13", "MK14", "MK15", "MK16", "MK17", "MK18", "MK19", "MK21"),
"Tilaston peruskuolemansyy (aikasarjaluokitus)" = c("*"),
"Vuosi" = c("2020"),
"Tiedot" = c("*")))
df_11bt_fi <- as_tibble(as.data.frame(px_11bt_fi, column.name.type = "text", variable.value.type = "text"))
df_11bt_fi |>
rename(Cause_fi = "Tilaston peruskuolemansyy (aikasarjaluokitus)") |>
separate(Maakunta, into = c("ID", "Region_fi"), sep = " ", extra = "merge") |>
mutate(Cause_id = str_match(Cause_fi, "[:graph:]+[:space:]")) |>
select(ID, Region_fi, Cause_fi, Cause_id) -> df_11bt_fi
df_11bt |>
rename(Cause = "Underlying cause of death (time series classification)") |>
mutate(Year = as.integer(Year)) |>
separate(Region, into = c("ID", "Region"), sep = " ", extra = "merge") |>
group_by(ID, Region, Cause) |>
summarise(Deaths = sum(Deaths)) |>
mutate(Cause_id = str_match(Cause, "[:graph:]+[:space:]")) -> df_11bt
df_11bt |>
left_join(df_11bt_fi, by = c("ID", "Cause_id")) -> df_11bt
df_11rf |>
left_join(df_11bt, by = c("Region", "Cause")) |>
mutate(SMR = round(Deaths / Expected, 2)) |> # Standardized mortality ratio
select(Region_fi, Region, Cause_fi, Cause, Deaths, Expected, SMR
) -> df_11rf
# Get map
df_region_map <- get_municipalities(year = 2020, scale = 4500)
df_region_map <- df_region_map |>
group_by(maakunta_name_en) |>
summarise() |>
rename(Region = maakunta_name_en)
df_cause_region <- df_region_map |>
left_join(df_11rf, by = "Region")
# Function: Create plots
by_cause <- function(cause_en){
df_cause_region |>
filter(Cause == cause_en) -> df
subtitle_1 <- paste0(df[["Cause"]][[1]], "\n",
df[["Cause_fi"]][[1]])
subtitle_1 <- str_remove_all(subtitle_1, "\\(.+\\)")
ggplot(df) +
geom_sf(aes(geometry = geom, fill = SMR)) +
scale_fill_gradient2(low = muted("blue"),
mid = "white",
high = muted("red"),
midpoint = 1,
space="Lab") +
labs(fill = NULL) +
theme(legend.position = c(0.2, 0.65),
legend.background = element_blank(),
legend.key.size = unit(0.2, "in"),
legend.text = element_text(size = 9),
plot.title = element_text(size = 11),
plot.subtitle = element_text(size = 10),
plot.title.position = "plot") -> p
df |>
ungroup() |>
filter(Cause == cause_en) |>
column_to_rownames(var = "Region_fi") |>
arrange(desc(SMR)) |>
select(Deaths, Expected, SMR) -> df
p <- p + tableGrob(df, theme = ttheme_default(base_size = 9)) +
plot_annotation(title = "Deaths vs expected deaths (2016-2020) for:",
subtitle = subtitle_1,
theme = theme(plot.title = element_text(size = 11),
plot.subtitle = element_text(size = 14)))
}
# Save plots
for (i in seq(from = 1, to = length(causes))) {
p <- by_cause(causes[i])
name = paste0("images//region//", as.character(i), ".png")
ggsave(filename = name, p, device = "png", dpi = 96,
width = 7.3, height = 6, units = c("in"))
}
px_12am <- pxweb_get(url =
"https://statfin.stat.fi:443/PxWeb/api/v1/en/StatFin/kuol/statfin_kuol_pxt_12am.px",
query = list( "Sukupuoli" = c("1", "2"), "Vuosi" = c("*"), "Tiedot" = c("*")))
df_12am <- as_tibble(as.data.frame(px_12am, column.name.type = "text", variable.value.type = "text"))
df_12am <- df_12am |>
rename(Life_exp = "Life expectancy at birth, years") |>
mutate(Year = str_sub(Year, 1, 4)) |>
mutate(Year = as.integer(Year),
Sex = as_factor(Sex))
ggplot(df_12am, aes(Year, Life_exp, color = Sex)) +
geom_line(size = 1.1) +
scale_colour_hue(direction = -1) +
theme(legend.position = c(0.25, 0.75),
legend.background = element_blank(),
plot.title = element_text(hjust = 0.06),
plot.tag.position = c(0.2, 1)) +
labs(subtitle = "Life expectancy by year and sex (1751-2021)", y = NULL, x = NULL,
caption = "source: Tilastokeskus 12am -- Life expectancy at birth by sex, 1751-2021"
) -> life_exp_plot
# Survivors of 100k born alive
px_12ap <- pxweb_get(url =
"https://statfin.stat.fi:443/PxWeb/api/v1/en/StatFin/kuol/statfin_kuol_pxt_12ap.px",
query = list("Sukupuoli" = c("1", "2"), "Vuosi" = c("*"),
"Tiedot" = c("elo100000synt")))
df_12ap <- as_tibble(as.data.frame(px_12ap, column.name.type = "text", variable.value.type = "text"))
df_12ap |>
rename(survivors_100k = "Survivors of 100,000 born alive") |>
mutate(Sex = as_factor(Sex),
Year = as.integer(Year),
Age = as.integer(Age)) |>
group_by(Sex, Age) |>
summarise(survivors_100k = mean(survivors_100k)) |>
ggplot(aes(Age, survivors_100k, color = Sex)) +
geom_line(size = 1.1) +
scale_colour_hue(direction = -1) +
theme(legend.position = "none",
plot.title = element_text(hjust = 0.06),
plot.tag.position = c(0.2, 1)) +
labs(subtitle = "Survival of 100,000 born alive by age and sex", y = NULL, x = NULL,
caption = "source: Tilastokeskus 12ap -- Life table by age and sex, 1986-2020"
) -> survivors_plot
# Life expectancy by region
px_12an <- pxweb_get(url =
"https://statfin.stat.fi:443/PxWeb/api/v1/en/StatFin/kuol/statfin_kuol_pxt_12an.px",
query = list(
"Maakunta" = c("MK01", "MK02", "MK04", "MK05", "MK06", "MK07", "MK08", "MK09", "MK10",
"MK11", "MK12", "MK13", "MK14", "MK15", "MK16", "MK17", "MK18", "MK19", "MK21"),
"Vuosi" = c("2020"),
"Sukupuoli" = c("1", "2"),
"Tiedot" = c("*")))
df_12an <- as_tibble(as.data.frame(px_12an, column.name.type = "text", variable.value.type = "text"))
df_12an <- df_12an |>
rename(Life_exp = "Life expectancy at birth, years") |>
mutate(Region = str_remove_all(Region, "MK.. "))
df_life_region <- df_region_map |>
left_join(df_12an, by = c("Region"))
df_life_region |>
filter(Sex == "Females") |>
ggplot() +
geom_sf(aes(geometry = geom, fill = Life_exp)) +
scale_fill_distiller(palette = "Spectral", direction = 1) +
labs(title = "Life expectancy by region (2018-2020)", subtitle = "Females", fill = NULL) +
theme(legend.position = c(0.2, 0.6),
legend.background = element_blank(),
legend.key.size = unit(0.15, "in"),
legend.text = element_text(size = 8),
panel.background = element_rect(colour = "#CC79A7"),
plot.margin = margin(r = 0, unit = "cm"),
plot.title = element_text(size = 11),
plot.subtitle = element_text(size = 10)
) -> life_exp_region_f_plot
df_life_region |>
filter(Sex == "Males") |>
ggplot() +
geom_sf(aes(geometry = geom, fill = Life_exp)) +
scale_fill_distiller(palette = "Spectral", direction = 1) +
labs(subtitle = "Males", fill = NULL) +
theme(legend.position = c(0.2, 0.6),
legend.background = element_blank(),
legend.key.size = unit(0.15, "in"),
legend.text = element_text(size = 8),
panel.background = element_rect(colour = "#0072B2"),
plot.subtitle = element_text(size = 10)
) -> life_exp_region_m_plot
layout <- "
AA
AA
BB
BB
CD
CD
CD
"
plot_life_exp <- (life_exp_plot + survivors_plot +
life_exp_region_f_plot + life_exp_region_m_plot +
plot_layout(design = layout)) +
plot_annotation(caption = "source: Tilastokeskus 12an -- Life expectancy at birth by sex and region")
ggsave("images//plot_life_exp.png", plot_life_exp, device = "png", dpi = 120,
width = 7.5, height = 10, units = c("in"))